library("here")
library("sjPlot")
library("dplyr")
library("lme4")
library("viridis")
library("lmerTest")
library("ggplot2")
library("gridExtra")
library("gt")
source("analysis_functions.R")
mmrr_ind <- format_mmrr(here(dirname(getwd()), "p3_methods", "outputs", "mmrr_indsampling_results.csv"))
summary_hplot(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)
summary_hplot(mmrr_ind, "ratio_err", colpal = "viridis", direction = -1)
summary_hplot(mmrr_ind, "comboenv_err", divergent = TRUE)
summary_hplot(mmrr_ind, "geo_err", divergent = TRUE)
run_lmer(mmrr_ind, "ratio_ae", table_main = "Ratio Absolute Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 9.87 | 3.29 | 3.00 | 15346 | 565.93 | 0.0*** |
| sampstrat | 1.36 | 0.45 | 3.00 | 15346 | 77.68 | 7.3 × 10−50*** |
| K | 0.82 | 0.82 | 1.00 | 15346 | 140.83 | 2.4 × 10−32*** |
| m | 11.36 | 11.36 | 1.00 | 15346 | 1.95K | 0.0*** |
| phi | 0.03 | 0.03 | 1.00 | 15346 | 4.82 | 0.028** |
| H | 0.52 | 0.52 | 1.00 | 15346 | 89.76 | 3.1 × 10−21*** |
| r | 0.49 | 0.49 | 1.00 | 15346 | 85.09 | 3.2 × 10−20*** |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
run_lmer(mmrr_ind, "ratio_err", table_main = "Ratio Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 0.06 | 0.02 | 3.00 | 15346 | 1.62 | 0.18 |
| sampstrat | 8.91 | 2.97 | 3.00 | 15346 | 229.55 | 1.1 × 10−145*** |
| K | 0.01 | 0.01 | 1.00 | 15346 | 0.45 | 0.50 |
| m | 0.01 | 0.01 | 1.00 | 15346 | 0.95 | 0.33 |
| phi | 0.00 | 0.00 | 1.00 | 15346 | 0.37 | 0.54 |
| H | 0.02 | 0.02 | 1.00 | 15346 | 1.22 | 0.27 |
| r | 0.22 | 0.22 | 1.00 | 15346 | 17.12 | 3.5 × 10−5*** |
| *** p < 0.001 | ||||||
run_lmer(mmrr_ind, "comboenv_err", table_main = "IBE Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 0.00 | 0.00 | 3.00 | 15346 | 0.72 | 0.540 |
| sampstrat | 0.72 | 0.24 | 3.00 | 15346 | 237.68 | 9.5 × 10−151*** |
| K | 0.00 | 0.00 | 1.00 | 15346 | 4.93 | 0.026** |
| m | 0.01 | 0.01 | 1.00 | 15346 | 10.41 | 1.3 × 10−3* |
| phi | 0.00 | 0.00 | 1.00 | 15346 | 0.00 | 1.000 |
| H | 0.01 | 0.01 | 1.00 | 15346 | 5.22 | 0.022** |
| r | 0.01 | 0.01 | 1.00 | 15346 | 14.66 | 1.3 × 10−4*** |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
| * p < 0.01 | ||||||
run_lmer(mmrr_ind, "geo_err", table_main = "IBD Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 0.15 | 0.05 | 3.00 | 15346 | 28.25 | 3.3 × 10−18*** |
| sampstrat | 6.13 | 2.04 | 3.00 | 15346 | 1.12K | 0.0*** |
| K | 0.00 | 0.00 | 1.00 | 15346 | 0.23 | 0.640 |
| m | 0.00 | 0.00 | 1.00 | 15346 | 0.14 | 0.710 |
| phi | 0.00 | 0.00 | 1.00 | 15346 | 1.39 | 0.240 |
| H | 0.01 | 0.01 | 1.00 | 15346 | 3.92 | 0.048** |
| r | 0.00 | 0.00 | 1.00 | 15346 | 0.22 | 0.640 |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
MEGAPLOT(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)
MEGAPLOT(mmrr_ind, "ratio_err", colpal = "viridis", direction = -1)
MEGAPLOT(mmrr_ind, "comboenv_err", divergent = TRUE)
MEGAPLOT(mmrr_ind, "geo_err", divergent = TRUE)
mmrr_site <- format_mmrr(here(dirname(getwd()), "p3_methods", "outputs", "mmrr_sitesampling_results.csv"))
mmrr_site$ratio_ae <- abs(mmrr_site$ratio_err)
summary_hplot(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)
summary_hplot(mmrr_site, "ratio_err", divergent = TRUE)
summary_hplot(mmrr_site, "comboenv_err", divergent = TRUE)
summary_hplot(mmrr_site, "geo_err", divergent = TRUE)
run_lmer(mmrr_site, "ratio_ae", table_main = "Ratio Absolute Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 12.59 | 6.29 | 2.00 | 8628 | 441.67 | 2.4 × 10−183*** |
| sampstrat | 1.56 | 0.78 | 2.00 | 8628 | 54.60 | 2.7 × 10−24*** |
| K | 0.00 | 0.00 | 1.00 | 8628 | 0.01 | 0.910 |
| m | 0.66 | 0.66 | 1.00 | 8628 | 46.59 | 9.4 × 10−12*** |
| phi | 0.05 | 0.05 | 1.00 | 8628 | 3.78 | 0.052 |
| H | 0.40 | 0.40 | 1.00 | 8628 | 28.06 | 1.2 × 10−7*** |
| r | 0.31 | 0.31 | 1.00 | 8628 | 22.05 | 2.7 × 10−6*** |
| *** p < 0.001 | ||||||
run_lmer(mmrr_site, "ratio_err", table_main = "Ratio Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 0.04 | 0.02 | 2.00 | 8628 | 0.53 | 0.590 |
| sampstrat | 2.66 | 1.33 | 2.00 | 8628 | 38.18 | 3.1 × 10−17*** |
| K | 0.14 | 0.14 | 1.00 | 8628 | 3.90 | 0.048** |
| m | 0.48 | 0.48 | 1.00 | 8628 | 13.66 | 2.2 × 10−4*** |
| phi | 0.05 | 0.05 | 1.00 | 8628 | 1.35 | 0.250 |
| H | 6.42 | 6.42 | 1.00 | 8628 | 183.82 | 1.9 × 10−41*** |
| r | 0.06 | 0.06 | 1.00 | 8628 | 1.83 | 0.180 |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
run_lmer(mmrr_site, "comboenv_err", table_main = "IBE Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 0.00 | 0.00 | 2.00 | 8628 | 0.02 | 0.980 |
| sampstrat | 0.50 | 0.25 | 2.00 | 8628 | 51.55 | 5.5 × 10−23*** |
| K | 0.02 | 0.02 | 1.00 | 8628 | 3.18 | 0.074 |
| m | 0.01 | 0.01 | 1.00 | 8628 | 1.20 | 0.270 |
| phi | 0.01 | 0.01 | 1.00 | 8628 | 1.28 | 0.260 |
| H | 0.76 | 0.76 | 1.00 | 8628 | 157.52 | 8.1 × 10−36*** |
| r | 0.00 | 0.00 | 1.00 | 8628 | 0.86 | 0.350 |
| *** p < 0.001 | ||||||
run_lmer(mmrr_site, "geo_err", table_main = "IBD Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 0.03 | 0.02 | 2.00 | 8628 | 3.34 | 0.035*** |
| sampstrat | 0.34 | 0.17 | 2.00 | 8628 | 36.25 | 2.1 × 10−16** |
| K | 3.12 | 3.12 | 1.00 | 8628 | 657.68 | 7.5 × 10−140** |
| m | 69.60 | 69.60 | 1.00 | 8628 | 14.69K | 0.0** |
| phi | 0.48 | 0.48 | 1.00 | 8628 | 100.55 | 1.5 × 10−23** |
| H | 0.17 | 0.17 | 1.00 | 8628 | 36.06 | 2.0 × 10−9** |
| r | 0.01 | 0.01 | 1.00 | 8628 | 1.59 | 0.210 |
| *** p < 0.05 | ||||||
| ** p < 0.001 | ||||||
MEGAPLOT(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)
MEGAPLOT(mmrr_site, "ratio_err", divergent = TRUE)
MEGAPLOT(mmrr_site, "comboenv_err", divergent = TRUE)
MEGAPLOT(mmrr_site, "geo_err", divergent = TRUE)
gdm_ind <- format_gdm(here(dirname(getwd()), "p3_methods", "outputs", "gdm_indsampling_results.csv"))
summary_hplot(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)
summary_hplot(gdm_ind, "ratio_err", colpal = "viridis", direction = -1)
summary_hplot(gdm_ind, "comboenv_err", divergent = TRUE)
summary_hplot(gdm_ind, "geo_err", divergent = TRUE)
run_lmer(gdm_ind, "ratio_ae", table_main = "Ratio Absolute Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 11.69 | 3.90 | 3.00 | 15341 | 340.65 | 3.6 × 10−214*** |
| sampstrat | 0.59 | 0.20 | 3.00 | 15341 | 17.31 | 3.3 × 10−11*** |
| K | 1.88 | 1.88 | 1.00 | 15341 | 164.64 | 1.7 × 10−37*** |
| m | 23.08 | 23.08 | 1.00 | 15341 | 2.02K | 0.0*** |
| phi | 0.06 | 0.06 | 1.00 | 15341 | 4.84 | 0.028** |
| H | 0.00 | 0.00 | 1.00 | 15341 | 0.42 | 0.520 |
| r | 0.20 | 0.20 | 1.00 | 15341 | 17.56 | 2.8 × 10−5*** |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
run_lmer(gdm_ind, "ratio_err", table_main = "Ratio Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 12.34 | 4.11 | 3.00 | 15341 | 274.37 | 1.8 × 10−173*** |
| sampstrat | 0.62 | 0.21 | 3.00 | 15341 | 13.86 | 5.1 × 10−9*** |
| K | 1.25 | 1.25 | 1.00 | 15341 | 83.52 | 7.1 × 10−20*** |
| m | 19.32 | 19.32 | 1.00 | 15341 | 1.29K | 4.5 × 10−271*** |
| phi | 0.89 | 0.89 | 1.00 | 15341 | 59.33 | 1.4 × 10−14*** |
| H | 0.40 | 0.40 | 1.00 | 15341 | 26.92 | 2.1 × 10−7*** |
| r | 0.23 | 0.23 | 1.00 | 15341 | 15.22 | 9.6 × 10−5*** |
| *** p < 0.001 | ||||||
run_lmer(gdm_ind, "comboenv_err", table_main = "IBE Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 7.16 | 2.39 | 3.00 | 15341 | 541.20 | 0.0*** |
| sampstrat | 0.11 | 0.04 | 3.00 | 15341 | 8.46 | 1.3 × 10−5*** |
| K | 0.00 | 0.00 | 1.00 | 15341 | 0.09 | 0.76 |
| m | 0.66 | 0.66 | 1.00 | 15341 | 148.78 | 4.6 × 10−34*** |
| phi | 0.10 | 0.10 | 1.00 | 15341 | 22.95 | 1.7 × 10−6*** |
| H | 0.25 | 0.25 | 1.00 | 15341 | 57.20 | 4.2 × 10−14*** |
| r | 0.12 | 0.12 | 1.00 | 15341 | 26.44 | 2.8 × 10−7*** |
| *** p < 0.001 | ||||||
run_lmer(gdm_ind, "geo_err", table_main = "IBD Error")
## boundary (singular) fit: see help('isSingular')
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 106.15 | 35.38 | 3.00 | 15343 | 653.85 | 0.0*** |
| sampstrat | 32.12 | 10.71 | 3.00 | 15343 | 197.87 | 6.5 × 10−126*** |
| K | 10.24 | 10.24 | 1.00 | 15343 | 189.23 | 8.4 × 10−43*** |
| m | 11.90 | 11.90 | 1.00 | 15343 | 219.96 | 2.0 × 10−49*** |
| phi | 0.14 | 0.14 | 1.00 | 15343 | 2.55 | 0.110 |
| H | 0.27 | 0.27 | 1.00 | 15343 | 5.00 | 0.025** |
| r | 0.15 | 0.15 | 1.00 | 15343 | 2.79 | 0.095 |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
MEGAPLOT(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)
MEGAPLOT(gdm_ind, "ratio_err", colpal = "viridis", direction = -1)
MEGAPLOT(gdm_ind, "comboenv_err", divergent = TRUE)
MEGAPLOT(gdm_ind, "geo_err", divergent = TRUE)
Confirm that the distribution of NAs is as expected and the proportions are small
MEGAPLOT(gdm_ind, "geo_coeff", aggfunc = "prop_na", colpal = "mako")
gdm_site <- format_gdm(here(dirname(getwd()), "p3_methods", "outputs", "gdm_sitesampling_results.csv"))
gdm_site$ratio_ae <- abs(gdm_site$ratio_err)
summary_hplot(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)
summary_hplot(gdm_site, "ratio_err", divergent = TRUE)
summary_hplot(gdm_site, "comboenv_err", divergent = TRUE)
summary_hplot(gdm_site, "geo_err", divergent = TRUE)
run_lmer(gdm_site, "ratio_ae", table_main = "Ratio Absolute Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 18.83 | 9.42 | 2.00 | 8342.001 | 308.59 | 5.1 × 10−130*** |
| sampstrat | 11.50 | 5.75 | 2.00 | 8342.001 | 188.41 | 9.3 × 10−81*** |
| K | 0.09 | 0.09 | 1.00 | 8342.001 | 2.90 | 0.089 |
| m | 0.35 | 0.35 | 1.00 | 8342.000 | 11.52 | 6.9 × 10−4*** |
| phi | 0.00 | 0.00 | 1.00 | 8342.001 | 0.01 | 0.930 |
| H | 0.00 | 0.00 | 1.00 | 8342.000 | 0.00 | 0.960 |
| r | 0.16 | 0.16 | 1.00 | 8342.004 | 5.24 | 0.022** |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
run_lmer(gdm_site, "ratio_err", table_main = "Ratio Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 19.92 | 9.96 | 2.00 | 8342.001 | 283.78 | 5.8 × 10−120*** |
| sampstrat | 13.76 | 6.88 | 2.00 | 8342.001 | 196.00 | 6.5 × 10−84*** |
| K | 0.05 | 0.05 | 1.00 | 8342.001 | 1.34 | 0.250 |
| m | 0.02 | 0.02 | 1.00 | 8342.000 | 0.71 | 0.400 |
| phi | 0.21 | 0.21 | 1.00 | 8342.001 | 5.89 | 0.015** |
| H | 0.27 | 0.27 | 1.00 | 8342.000 | 7.79 | 5.3 × 10−3* |
| r | 0.20 | 0.20 | 1.00 | 8342.004 | 5.83 | 0.016** |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
| * p < 0.01 | ||||||
run_lmer(gdm_site, "comboenv_err", table_main = "IBE Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 11.64 | 5.82 | 2.00 | 8342.001 | 309.03 | 3.4 × 10−130*** |
| sampstrat | 1.53 | 0.76 | 2.00 | 8342.001 | 40.53 | 3.0 × 10−18*** |
| K | 0.19 | 0.19 | 1.00 | 8342.001 | 10.29 | 1.3 × 10−3** |
| m | 0.84 | 0.84 | 1.00 | 8342.000 | 44.77 | 2.4 × 10−11*** |
| phi | 0.06 | 0.06 | 1.00 | 8342.001 | 3.40 | 0.065 |
| H | 0.04 | 0.04 | 1.00 | 8342.000 | 2.00 | 0.160 |
| r | 0.13 | 0.13 | 1.00 | 8342.003 | 6.85 | 8.9 × 10−3** |
| *** p < 0.001 | ||||||
| ** p < 0.01 | ||||||
run_lmer(gdm_site, "geo_err", table_main = "IBD Error")
| Predictors | Sum Sq | Mean Sq | NumDF | DenDF | F value | Pr(>F) |
|---|---|---|---|---|---|---|
| nsamp | 7.21 | 3.60 | 2.00 | 8342.015 | 11.60 | 9.3 × 10−6*** |
| sampstrat | 700.45 | 350.22 | 2.00 | 8342.021 | 1.13K | 0.0*** |
| K | 363.91 | 363.91 | 1.00 | 8342.020 | 1.17K | 2.6 × 10−240*** |
| m | 2.00K | 2.00K | 1.00 | 8342.002 | 6.43K | 0.0*** |
| phi | 16.23 | 16.23 | 1.00 | 8342.017 | 52.22 | 5.4 × 10−13*** |
| H | 1.26 | 1.26 | 1.00 | 8342.006 | 4.05 | 0.044** |
| r | 1.36 | 1.36 | 1.00 | 8342.053 | 4.39 | 0.036** |
| *** p < 0.001 | ||||||
| ** p < 0.05 | ||||||
MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)
MEGAPLOT(gdm_site, "ratio_err", divergent = TRUE)
MEGAPLOT(gdm_site, "comboenv_err", divergent = TRUE)
MEGAPLOT(gdm_site, "geo_err", divergent = TRUE)
Confirm that the distribution of NAs is as expected and the proportions are small
MEGAPLOT(gdm_site, "geo_coeff", aggfunc = "prop_na", colpal = "mako")
plot(mmrr_ind$geo_coeff, gdm_ind$geo_coeff, col = gdm_ind$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)
plot(mmrr_ind$comboenv_coeff, gdm_ind$comboenv_coeff)
df <- data.frame(mmrr_ind,
geo_mg = gdm_ind$geo_coeff/mmrr_ind$geo_coeff,
env_mg = gdm_ind$comboenv_coeff/mmrr_ind$comboenv_coeff,
ratio_mg = gdm_ind$ratio/mmrr_ind$ratio )
summary_hplot(df, "geo_mg")
summary_hplot(df, "env_mg")
summary_hplot(df, "ratio_mg")
MEGAPLOT(df, "geo_mg")
MEGAPLOT(df, "env_mg")
MEGAPLOT(df, "ratio_mg")
plot(mmrr_site$geo_coeff, gdm_site$geo_coeff, col = gdm_site$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)
plot(mmrr_site$comboenv_coeff, gdm_site$comboenv_coeff)
df <- data.frame(mmrr_site,
geo_mg = gdm_site$geo_coeff/mmrr_site$geo_coeff,
env_mg = gdm_site$comboenv_coeff/mmrr_site$comboenv_coeff,
ratio_mg = gdm_site$ratio/mmrr_site$ratio )
summary_hplot(df, "geo_mg")
summary_hplot(df, "env_mg")
summary_hplot(df, "ratio_mg")